****************************************************
*****Performance and Payment at the Global Fund*****
*********CGD POLICY PAPER, APRIL 2013***************
*************REPLICATION DO FILE********************

**Do file for replicating regression analyses in CGD Policy Paper, "Performance and Payment at the Global Fund"

/*LIST OF RAW DATASETS AND BRIEF DESCRIPTIONS 
GFgrants-raw.dta // Global Fund's raw grants dataset, July 2012
GFdisb-raw.dta // Global Fund's raw disbursements dataset, July 2012
renewals-11dec12.dta // Global Fund's raw grant renewals - phase 2 dataset, December 2012
country-characteristics-raw.dta //  A country characteristics dataset [see codebook for more details] , September 2012
codes_masterlist.dta // A list of different country codes and spellings to help with merging
*/

global input "N:\Health\Value for Money\Database\PBF-Replication"				//Input, where all the raw files are
global output "N:\Health\Value for Money\Database\PBF-Replication\Output"		//Output, where all the files created go to
global regs "N:\Health\Value for Money\Database\PBF-Replication\Regressions"	//Regressions, where the regression tables go to
cd "$input"

******************************************************
** 1. Extract and clean Global Fund grants database **
******************************************************

*Downloaded from the Global Fund's database on July 11, 2012. http://www.theglobalfund.org/en/. "Grants in Details (Raw Data) (XLS 559 KB)" and "Disbursements in Details (Raw Data) (XLS 1852 KB)" 
*Excel sheet is copied and pasted into Stata

use GFgrants-raw.dta, clear

gen gasigdatef = date(gasigdate, "MDY")
format gasigdatef %td
label var gasigdatef "Grant Agreement Signing Date"

gen progstartdatef = date(progstartdate, "MDY")
format progstartdatef %td
label var progstartdatef "Program Start Date"

gen latestdisbursedatef = date(latestdisbursedate, "MDY")
format latestdisbursedatef %td
label var latestdisbursedatef "Latest Disbursement Date"

gen p2approvaldatef = date(p2approvaldate, "MDY")
format p2approvaldatef %td
label var p2approvaldatef "Phase 2 Approval Date"

gen p2sigdateprf = date(p2sigdatepr, "MDY")
format p2sigdateprf %td
label var p2sigdateprf "Phase 2 Sign Date by PR"

gen p2sigdategff = date(p2sigdategf, "MDY")
format p2sigdategff %td
label var p2sigdategff "Phase 2 Sign Date by GF"

gen gaenddatef = date(gaenddate, "MDY")
format gaenddatef %td
label var gaenddatef "Phase 2 End Date by GF"

gen gapropcompdatef = date(gapropcompdate, "MDY")
format gapropcompdatef %td
label var gapropcompdatef "Proposal Completed Date"

gen gasigdateprrcc1f = date(gasigdateprrcc1, "MDY")
format gasigdateprrcc1f %td
label var gasigdateprrcc1f "PR Sign Date for RCC Agreement"

gen gasigdategfrcc1f = date(gasigdategfrcc1, "MDY")
format gasigdategfrcc1f %td
label var gasigdategfrcc1f "GF Sign Date for RCC Agreement"

gen gasigyear = year(gasigdatef)
gen progstartdateyear = year(progstartdatef)
gen latestdisbursedateyear = year(latestdisbursedatef)
gen p2approvaldateyear = year(p2approvaldatef)
gen p2sigdatepryear = year(p2sigdateprf)
gen p2sigdategfyear = year(p2sigdategff)
gen gaenddateyear = year(gaenddatef)
gen gapropcompdateyear = year(gapropcompdatef)
gen gasigdateprrcc1year = year(gasigdateprrcc1f)
gen gasigdategfrcc1year = year(gasigdategfrcc1f)

sort gagrantno
save "$output/GFgrants-cleaned.dta"


*************************************************************
** 2. Extract and clean Global Fund disbursements database **
*************************************************************
 
*Downloaded from the Global Fund's database on July 11, 2012. http://www.theglobalfund.org/en/. "Disbursements in Details (Raw Data) (XLS 1852 KB)" 
*Data copied and pasted from Excel into Stata

use GFdisb-raw.dta

gen gddatef = date(gddate, "MDY")
format gddatef %td
label var gddatef "Disbursement Date"

gen gddateyear = year(gddatef)
rename gddateyear year

gen gfscore=.
replace gfscore=1 if gfrating=="C"
replace gfscore=2 if gfrating=="B2"
replace gfscore=3 if gfrating=="B1"
replace gfscore=4 if gfrating=="A2"
replace gfscore=5 if gfrating=="A1"
replace gfscore=5 if gfrating=="A"	
replace gfscore=. if gfrating==""
replace gfscore=. if gfrating=="NR"

g dis=1 if gadisease_ == "HIV/AIDS" | gadisease_ == "HIV/TB"
replace dis=2 if gadisease_ == "TB"
replace dis=3 if gadisease_ == "Malaria"
replace dis=4 if gadisease_ != "" & dis == .
label define dis 1 "HIV/AIDS, HIV&TB" 2 "TB" 3 "Malaria" 4 "HSS"
label values dis dis
tab dis, g(disdum)
label var dis "GF Disease"

save "$output/GFdisb-cleaned.dta"

use "$output/GFdisb-cleaned.dta",clear
sort gagrantno
merge gagrantno using "$output/GFgrants-cleaned.dta" 
save "$output/GFgrantdates.dta"

use "$output/GFgrantdates.dta",clear
collapse (mean) gfscore, by(gagrantno) 
rename gfscore averagescore
count
save "$output/collapsed_mean_score.dta"

use "$output/GFgrantdates.dta",clear
collapse (firstnm) gfscore, by(gagrantno)
rename gfscore firstscore
count
save "$output/collapsed_firstnm_score.dta"

use "$output/GFgrantdates.dta",clear
collapse (lastnm) gfscore, by(gagrantno)
rename gfscore lastscore
count
save "$output/collapsed_lastnm_score.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef<p2sigdategff
collapse (lastnm) gfscore, by(gagrantno)
rename gfscore phase1lastscore
count
save "$output/collapsed_lastnm_score_phase1.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef<p2sigdategff
collapse (firstnm) gfscore, by(gagrantno)
rename gfscore phase1firstscore
count
save "$output/collapsed_firstnm_score_phase1.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef >= p2sigdategff
collapse (firstnm) gfscore, by(gagrantno)
rename gfscore phase2firstscore
count
save "$output/collapsed_firstnm_score_phase2.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef<p2sigdategff
count
collapse (mean) gfscore, by(gagrantno)
count 
rename gfscore phase1averagescore
save "$output/collapsed_mean_score_phase1.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef<p2sigdategff
count
g ph1numdisb = 1
collapse (sum) ph1numdisb, by(gagrantno)
count 
save "$output/collapsed_num_disb_phase1.dta"

foreach type in mean sum {
	use "$output/GFgrantdates.dta",clear
	keep if gddatef<p2sigdategff
	count
	collapse (`type') disbusdequiv, by(gagrantno)
	rename disbusdequiv disb_`type'_phase1
	count
	save "$output/collapsed_`type'_disb_phase1.dta"
}

use "$output/GFgrantdates.dta",clear
keep if gddatef>=p2sigdategff
count
collapse (mean) gfscore, by(gagrantno)
rename gfscore phase2averagescore
count
save "$output/collapsed_mean_score_phase2.dta"

use "$output/GFgrantdates.dta",clear
keep if gddatef>=p2sigdategff
count
g ph2numdisb = 1
collapse (sum) ph2numdisb, by(gagrantno)
count 
save "$output/collapsed_num_disb_phase2.dta"

foreach type in mean sum {
	use "$output/GFgrantdates.dta",clear
	keep if gddatef>=p2sigdategff
	count
	collapse (`type') disbusdequiv, by(gagrantno)
	rename disbusdequiv disb_`type'_phase2
	count
	save "$output/collapsed_`type'_disb_phase2.dta"
}

foreach type in mean sum {
	use "$output/GFgrantdates.dta",clear
	count
	collapse (`type') disbusdequiv, by(gagrantno)
	rename disbusdequiv disb_`type'_overall
	count 
	save "$output/collapsed_`type'_disb_overall.dta"
}

use GFgrants-raw.dta,clear
sort gagrantno
merge gagrantno using "$output/collapsed_mean_score.dta"
tab _merge

foreach thing in  lastnm_score lastnm_score_phase1 firstnm_score firstnm_score_phase1 mean_score_phase1 num_disb_phase1 {
	drop _merge
	sort gagrantno
	merge gagrantno using "$output/collapsed_`thing'.dta"
	tab _merge
	}
g  ph1inprog = _merge
replace ph1inprog = ph1inprog == 3

foreach thing in mean_score_phase2 firstnm_score_phase2 num_disb_phase2  {
	drop _merge
	sort gagrantno
	merge gagrantno using "$output/collapsed_`thing'.dta"
	tab _merge
}
g  ph2inprog = _merge
replace ph2inprog = ph2inprog == 3

foreach num of numlist 1/2 {
	foreach type in mean sum {
	drop _merge
	sort gagrantno
	merge gagrantno using "$output/collapsed_`type'_disb_phase`num'.dta"
	tab _merge
	}
}


foreach type in mean sum {
	drop _merge
	sort gagrantno
	merge gagrantno using "$output/collapsed_`type'_disb_overall.dta"
	tab _merge
}

drop _merge
save "$output/GFgrants-disbscore.dta", replace


*********************************
** 3. Phase 2 renewals dataset **
*********************************

use renewals-11dec12.dta, clear
sort gagrantno
save "$output/renewals-sorted.dta", replace
use "$output/GFgrants-disbscore.dta"
sort gagrantno
merge gagrantno using "$output/renewals-sorted.dta"
sort country
save "$output/GFgrants-disbscore-phase2.dta", replace


********************************
** 4. Merge in grants dataset **
********************************

use "$output/GFgrants-disbscore-phase2.dta"
drop _merge
merge m:m gagrantno using "$output/GFgrants-cleaned.dta"
save, replace


*************************************************
** 5. merge in codes, GF grant characteristics **
*************************************************

use codes_masterlist.dta, clear
rename wb_ccode ccode
save "$output/codes_masterlist_edited.dta", replace

use "$output/GFgrants-disbscore-phase2.dta", clear
sort country
drop _merge
merge m:1 country using "$output/codes_masterlist_edited.dta"
replace ccode="TZA" if country=="Zanzibar"

rename _merge mergecodes
merge m:m ccode using country-characteristics.dta
keep if _merge==3
drop if disb_sum_overall==.
save "$output/GFdisb-countrychar.dta"


*****************************
** 6. Regressions/analysis **
*****************************

use "$output/GFdisb-countrychar.dta"

*Generate grant characteristics

gen lfa2 = 1 if lfa == "KPMG"
replace lfa2 = 2 if lfa == "PwC"
replace lfa2 = 3 if lfa == "STI"
replace lfa2 = 4 if lfa == "UNOPS"
replace lfa2 = 5 if lfa != "" & lfa2 == .
tab lfa2
label define  lfa2 1 "KPMG" 2 "PwC" 3 "STI" 4 "UNOPS" 5 "Other"
label values lfa2 lfa2
tab lfa2, g(lfa2dum)
sum lfa2dum*
label var lfa2dum1 "LFA: KPMG"
label var lfa2dum2 "LFA: PwC"
label var lfa2dum3 "LFA: STI"
label var lfa2dum4 "LFA: UNOPS"
label var lfa2dum5 "LFA: Other"

tab prtype, g(prtypedum)
label var prtypedum1 "Civil Society/Private Sector: Faith Based Organization (CS/PS: FBO)"
label var prtypedum2 "Civil Society/Private Sector: NGO (CS/PS: NGO)"
label var prtypedum3 "Civil Society/Private Sector: Other (CS/PS: Oth)"
label var prtypedum4 "Civil Society/Private Sector: Private Sector (CS/PS: PS)"
label var prtypedum5 "Government: Ministry of Finance (GOV: MOF)"
label var prtypedum6 "Government: Ministry of Health (GOV: MOH)"
label var prtypedum7 "Government: Other(GOV: Oth)"
label var prtypedum8 "Multilateral Organization: Other (MO: Oth)"
label var prtypedum9 "Multilateral Organization: UNDP(MO: UNDP)"
label var prtypedum10 "Third Party"

g prtype2 = 1 if prtypedum1 == 1 | prtypedum2 == 1 | prtypedum3 == 1 | prtypedum4 == 1 | prtypedum10 == 1
replace prtype2 = 2 if prtypedum5 == 1 | prtypedum6 == 1 | prtypedum7 == 1
replace prtype2 = 3 if prtypedum8 == 1 | prtypedum9 == 1 
tab prtype2
label define prtype2 1 "Civil Society/Private Sector/Third Party" 2 "Government" 3 "Multilateral Org"
label values prtype2 prtype2
tab prtype2, g(prtype2dum)
label var prtype2dum1 "Civil Society/Private Sector/Third Party"
label var prtype2dum2 "Government"
label var prtype2dum3 "Multilateral Org"


gen lndisb_sum_phase1 = ln(disb_sum_phase1)
gen lndisb_sum_phase2 = ln(disb_sum_phase2)

rename ph1numdisb p1disbcount

rename phase1averagescore phase1averagescore_cont 
rename phase2averagescore phase2averagescore_cont 
rename component_ disease

tab disease
g dis=1 if disease == "HIV/AIDS" | disease == "HIV/TB"
replace dis=2 if disease == "TB"
replace dis=3 if disease == "Malaria"
replace dis=4 if disease != "" & dis == .
label define dis 1 "HIV/AIDS, HIV&TB" 2 "TB" 3 "Malaria" 4 "HSS"
label values dis dis
tab dis, g(disdum)
label var dis "GF Disease"

**IMPUTED PREVALENCE VALUES FOR 4 COUNTRIES. SOURCE: UNAIDS REPORT 2011.

replace p_unaids2004=1319288 if country=="Congo (Democratic Republic)"
replace p_unaids2008=1333334 if country=="Congo (Democratic Republic)"
replace p_unaids2003=9185 if country=="Cuba"
replace p_unaids2008=9528 if country=="Cuba"
replace p_unaids2003=14091 if country=="Estonia"
replace p_unaids2008=14332 if country=="Estonia"
replace p_unaids2004=2209408 if country=="Ethiopia"
replace p_unaids2005=1659218 if country=="Ethiopia"
replace p_unaids2009=1890796 if country=="Ethiopia"

forval i=2002/2010 {
gen diseaseprev`i'=0
replace diseaseprev`i'=mprev`i' if disease=="Malaria"
replace diseaseprev`i'=tbprev`i' if disease=="TB"
}

forval i=2002/2009 {
replace diseaseprev`i'=p_unaids`i' if disease=="HIV/AIDS"
replace diseaseprev`i'=p_unaids`i' if disease=="HIV/TB"
}


g diseaseprevstartyear = .
forval i = 2002/2010 { 
	replace diseaseprevstartyear = diseaseprev`i' if progstartdateyear == `i' &  dis != 1
}
forval i = 2002/2009 { 
	replace diseaseprevstartyear = diseaseprev`i' if progstartdateyear == `i' &  dis == 1
}

*Impute 2009 values for grants starting in 2010 for which there is no prevalence data 
*This imputation will not affect many grants used in analyses where only grants ending before 2012 are included

replace diseaseprevstartyear = diseaseprev2009 if progstartdateyear > 2009 & disease=="HIV/AIDS"
replace diseaseprevstartyear = diseaseprev2009 if progstartdateyear > 2009 & disease=="HIV/TB"

forval i = 2002/2009 { 
g totalhealthexppercap`i' = theusd`i'/population`i'
}

g thepcstartyear = .
forval i = 2002/2009 { 
	replace thepcstartyear = totalhealthexppercap`i' if progstartdateyear == `i'
}
replace thepcstartyear = totalhealthexppercap2009 if progstartdateyear > 2009

g lnthepcstartyear = ln(thepcstartyear)

g gestartyear = .
forval i = 2002/2010 { 
	replace gestartyear = ge`i' if progstartdateyear == `i'
}


g gdppercapstartyear = .
forval i = 2002/2011 { 
	replace gdppercapstartyear = gdp_pc`i' if progstartdateyear == `i'
}

g lngdppercapstartyear = ln(gdppercapstartyear)

g dahpcstartyear = .
forval i = 2002/2011 { 
	replace dahpcstartyear = DISBURSEMENT_CONS_09_PC`i' if progstartdateyear == `i'
}

g lndahpcstartyear = ln(dahpcstartyear)

forval i=2002/2010 {
gen deaths`i'=0
replace deaths`i'=malariadeaths`i' if disease=="Malaria"
replace deaths`i'=tbdeaths`i' if disease=="TB"
}
forval i=2002/2009 {
replace deaths`i'=d`i' if disease=="HIV/AIDS"
replace deaths`i'=d`i' if disease=="HIV/TB"
}


g deathstartyear = .
forval i = 2002/2010 { 
	replace deathstartyear = deaths`i' if progstartdateyear == `i'
}

replace deathstartyear = deaths2009 if progstartdateyear > 2009 & disease=="HIV/AIDS" 

set more off

gen diseaseprevstartyear_1000000 = diseaseprevstartyear/1000000
gen diseaseprevstartyear_10000 = diseaseprevstartyear/10000
gen deathstartyear_10000 = deathstartyear/10000

g p1endprev = .
g p2endprev = .

forval i = 2002/2010 {
	replace p1endprev = diseaseprev`i' if p2sigdategfyear == `i'
	replace p2endprev = diseaseprev`i' if latestdisbursedateyear==`i'
	replace p1endprev = diseaseprev`i' if latestdisbursedateyear==`i' & p2sigdategfyear ==. & ph2numdisb==.
}

g p1beginningprev = diseaseprevstartyear 
g p2beginningprev = p1endprev
g p1changeprev = (p1endprev-p1beginningprev)/p1beginningprev 
save "$output/GFgrants-forregs.dta"


****************************************************
********************* ANALYSIS *********************
****************************************************

use "$output/GFgrants-forregs.dta"

global grantvars "lndisb_sum_phase1 lfa2dum1 lfa2dum2 prtype2dum1 prtype2dum2 p1disbcount progstartdateyear"
global countryvars "gestartyear lndahpcstartyear lnthepcstartyear lngdppercapstartyear" 
	
*REGRESSION GROUP #1 PREDICTORS OF HAVING ANY (GREATER THAN ZERO) PHASE 2 DISBURSEMENTS

use "$output/GFgrants-forregs.dta",clear
g phase2nonzero = p2gaamtusdequiv != 0

global grantvars "lndisb_sum_phase1 lfa2dum1 lfa2dum2 prtype2dum1 prtype2dum2  p1disbcount progstartdateyear"
global countryvars "gestartyear lndahpcstartyear lnthepcstartyear lngdppercapstartyear" 
global grantscores "phase1averagescore_cont"

keep if gaenddateyear < 2012 

foreach diseaseno of numlist 1/3 {
	reg phase2nonzero phase1averagescore_cont if dis == `diseaseno', robust
	reg phase2nonzero phase1averagescore_cont $grantvars if dis == `diseaseno', robust 
	reg phase2nonzero phase1averagescore_cont $grantvars $countryvars diseaseprevstartyear_1000000 if dis == `diseaseno', robust
	}
foreach diseaseno of numlist 1/3 {
	probit phase2nonzero phase1averagescore_cont if dis == `diseaseno', robust
	probit phase2nonzero phase1averagescore_cont $grantvars if dis == `diseaseno', robust 
	probit phase2nonzero phase1averagescore_cont $grantvars $countryvars diseaseprevstartyear_1000000 if dis == `diseaseno', robust
	}
	
*REGRESSION GROUP #2. PHASE 2 DISBURSEMENTS IN LEVELS

**Grants that end before 2012

use "$output/GFgrants-forregs.dta",clear
keep if gaenddateyear < 2012 

global grantscores "phase1averagescore_cont"
global grantvars "lndisb_sum_phase1 lfa2dum1 lfa2dum2 prtype2dum1 prtype2dum2 p1disbcount progstartdateyear"
global countryvars "gestartyear lndahpcstartyear lnthepcstartyear lngdppercapstartyear" 

foreach outcome of varlist lndisb_sum_phase2  {
	foreach diseaseno of numlist 1/3 {
	reg `outcome' $grantscores if dis == `diseaseno', robust
	reg `outcome' $grantscores $grantvars if dis == `diseaseno', robust
	reg `outcome' $grantscores  $grantvars $countryvars diseaseprevstartyear_1000000 if dis==`diseaseno', robust
	}
	}

*REGRESSION GROUP #3: PHASE 2 SCORES AS DEPENDENT VARIABLES
*Phase 2 scores
cd "$regs"

foreach outcome of varlist phase2averagescore_cont {
	foreach diseaseno of numlist 1/3 {
	reg `outcome' $grantvars if dis == `diseaseno', robust 
	reg `outcome' $grantvars $countryvars if dis==`diseaseno', robust
	reg `outcome' $grantvars $countryvars p1changeprev if dis==`diseaseno', robust
	}
	}

***PROBIT MODELS FOR CHECK

global grantvars "lndisb_sum_phase1 lfa2dum1 lfa2dum2 prtype2dum1 prtype2dum2 p1disbcount progstartdateyear"
global countryvars "gestartyear lndahpcstartyear lnthepcstartyear lngdppercapstartyear" 
*Phase 2 scores
cd "$regs"

foreach outcome of varlist phase2averagescore_cont {
	foreach diseaseno of numlist 1/3 {
	oprobit `outcome' $grantvars if dis ==1
	oprobit `outcome' $grantvars $countryvars if dis== 1
	oprobit `outcome' $grantvars $countryvars p1changeprev if dis== 1
	}
	}

foreach outcome of varlist phase2averagescore_cont {
	foreach diseaseno of numlist 1/3 {
	oprobit `outcome' $grantvars if dis ==2
	oprobit `outcome' $grantvars $countryvars if dis== 2
	oprobit `outcome' $grantvars $countryvars p1changeprev if dis== 2
	}
	}

foreach outcome of varlist phase2averagescore_cont {
	foreach diseaseno of numlist 1/3 {
	oprobit `outcome' $grantvars if dis ==3
	oprobit `outcome' $grantvars $countryvars if dis== 3
	oprobit `outcome' $grantvars $countryvars p1changeprev if dis== 3
	}
	}
	
	
**SUMMARY STATISTICS TABLE, FOR ONLY GRANTS WHICH ARE USED IN REGRESSIONS AS WELL AS THE SAMPLING
cd "$input"
use "$output/GFgrants-forregs.dta"

*drop score1 score2 score3 score1d* score2d* score3d*
foreach outcome of varlist phase2averagescore_cont {
	foreach diseaseno of numlist 1/3 {
		reg `outcome' $grantvars if dis == `diseaseno', robust 
		predict score`diseaseno' if dis == `diseaseno', residuals
		g score`diseaseno'dum = 1 if score`diseaseno' != .
		sum score`diseaseno' score`diseaseno'dum
	}
	}
g scoretotal = 0 if score1dum == . & score2dum == . & score3dum == .
replace scoretotal = 1 if scoretotal == .
sum scoretotal
tab scoretotal
order scoretotal score1dum score1 score2dum score2 score3dum score3

*PHASE 2 DISBURSEMENTS IN LEVELS
global grantscores "phase1averagescore_cont"
global grantvars "lndisb_sum_phase1 lfa2dum1 lfa2dum2 prtype2dum1 prtype2dum2 p1disbcount progstartdateyear"
global countryvars "gestartyear lndahpcstartyear lnthepcstartyear lngdppercapstartyear" 

foreach outcome of varlist lndisb_sum_phase2  {
	foreach diseaseno of numlist 1/3 {
		reg `outcome' $grantscores if dis == `diseaseno' & gaenddateyear < 2012 , robust
		predict disbsumph2`diseaseno' if dis == `diseaseno' & gaenddateyear < 2012, residuals
		g disbsumph2`diseaseno'dum = 1 if disbsumph2`diseaseno' != .
		sum disbsumph2`diseaseno' disbsumph2`diseaseno'dum
	}
	}
g disbsumph2total = 0 if disbsumph21dum == . & disbsumph22dum == . & disbsumph23dum == .
replace disbsumph2total = 1 if disbsumph2total == .
sum disbsumph2total
tab disbsumph2total
order disbsumph2total disbsumph21dum disbsumph21 disbsumph22dum disbsumph22 disbsumph23dum disbsumph23

*HAVING ANY (GREATER THAN ZERO) PHASE 2 DISBURSEMENTS
g phase2nonzero = p2gaamtusdequiv != 0
foreach diseaseno of numlist 1/3 {
	reg phase2nonzero phase1averagescore_cont if dis == `diseaseno' & gaenddateyear < 2012 , robust
	predict ph2nonzero`diseaseno' if dis == `diseaseno' & gaenddateyear < 2012, residuals
	g ph2nonzero`diseaseno'dum = 1 if ph2nonzero`diseaseno' != .
	sum ph2nonzero`diseaseno' ph2nonzero`diseaseno'dum
	}
g ph2nonzerototal = 0 if ph2nonzero1dum == . & ph2nonzero2dum == . & ph2nonzero3dum == .
replace ph2nonzerototal = 1 if ph2nonzerototal == .
sum ph2nonzerototal
tab ph2nonzerototal
order ph2nonzerototal ph2nonzero1dum ph2nonzero1 ph2nonzero2dum ph2nonzero2 ph2nonzero3dum ph2nonzero3

g allobsforallregs = 0 if scoretotal == 0 & disbsumph2total == 0 & ph2nonzerototal == 0 
replace allobsforallregs = 1 if allobsforallregs == .
tab allobsforallregs

***TABLE 1
sum disb_sum_phase1 disb_sum_phase2 phase2nonzero phase1averagescore_cont phase2averagescore_cont lfa2dum* prtype2dum* diseaseprevstartyear progstartdateyear gdppercapstartyear thepcstartyear dahpcstartyear if allobsforallregs==1 & dis==1
sum disb_sum_phase1 disb_sum_phase2 phase2nonzero phase1averagescore_cont phase2averagescore_cont lfa2dum* prtype2dum* diseaseprevstartyear progstartdateyear gdppercapstartyear thepcstartyear dahpcstartyear if allobsforallregs==1 & dis==2
sum disb_sum_phase1 disb_sum_phase2 phase2nonzero phase1averagescore_cont phase2averagescore_cont lfa2dum* prtype2dum* diseaseprevstartyear progstartdateyear gdppercapstartyear thepcstartyear dahpcstartyear if allobsforallregs==1 & dis==3
